SDG14

Author

Ignacio Saldivia Gonzatti

Published

February 7, 2023

Import packages and set things up

Code
import numpy as np
import pandas as pd
import os
import csv
import composite as ci
import inspect
from functools import reduce
import plotly.express as px

# from xml.etree import ElementTree as ET
# import requests
# import json
# import webbrowser
Code
# for VSC users, Latex in Plotly is not working
# Workaround from https://github.com/microsoft/vscode-jupyter/issues/8131
import plotly
from IPython.display import display, HTML

plotly.offline.init_notebook_mode()
display(HTML(
    '<script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_SVG"></script>'
))
Code
# show all output from cells
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"  # last_expr
pd.set_option('display.max_columns', 20)
Code
# create output folder if not there
if not os.path.exists("../output"):
    os.makedirs("../output")

Country names dict

Code
# load countries dictionary
with open("../data/countryAbbr.csv") as csv_file:
    reader = csv.reader(csv_file)
    country_to_abbrev = dict(reader)
# invert the dictionary
abbrev_to_country = dict(map(reversed, country_to_abbrev.items()))
# add additional abbreviations
abbrev_to_country["EL"] = "Greece"
abbrev_to_country["GB"] = "United Kingdom"
Code
allEurope = pd.read_csv("../data/Countries-Europe.csv")
allEurope = allEurope.name.to_list()
Code
# fmt: off
countries = [
    "Belgium", "Bulgaria", "Cyprus", "Greece", "Germany", "Croatia",
    "Italy", "Denmark", "Estonia", "Spain", "Finland", "France",
    "Ireland", "Lithuania", "Latvia", "Malta", "Netherlands", "Poland",
    "Portugal", "Romania", "Sweden", "United Kingdom",]
countryAbb = [
    x if x not in country_to_abbrev else country_to_abbrev[x] for x in countries
]
# fmt: on

Define general functions

Code
def missingCountries(df, countries=countries):
    ### check missing countries in dataset
    missing = []
    for i in countries:
        if i not in df.index.unique():
            missing.append(i)
    if len(missing) > 0:
        print("Missing countries:\n", "\n".join(missing))
    else:
        print("No missing countries")

def negativeValues(df):
    ### check negative values in dataset
    if df[df < 0].count().sum() > 0:
        # replace negative values with 0
        df[df < 0] = 0
        print("Negative values in dataset replaced with 0")

SDG Official Data

Code
# SDG14 indicators from the UNstats
# https://unstats.un.org/sdgs/dataportal/database

sdg14 = pd.read_excel("../data/Goal14_april2023.xlsx", sheet_name=0)
# fmt: off
sdg14 = sdg14[
    [ 
        "Target", "Indicator", "SeriesCode", "GeoAreaName", 
        "TimePeriod", "Value", "Units", "SeriesDescription" 
    ]
]
# fmt: on

sdg14.Value.replace("N", np.nan, inplace=True)
sdg14.Value = sdg14.Value.astype(np.float64)

# filter countries
# sdg14 = sdg14[sdg14['GeoAreaName'].isin(countries)]
# show indicators
pd.DataFrame(
    sdg14.groupby(["Indicator", "SeriesCode", "SeriesDescription", "Units"]).size()
)
0
Indicator SeriesCode SeriesDescription Units
14.1.1 EN_MAR_BEALITSQ Beach litter per square kilometer (Number) NUMBER 611
EN_MAR_BEALIT_BP Beach litter originating from national land-based sources that ends in the beach (%) PERCENT 900
EN_MAR_BEALIT_BV Beach litter originating from national land-based sources that ends in the beach (Tonnes) TONNES 900
EN_MAR_BEALIT_EXP Exported beach litter originating from national land-based sources (Tonnes) TONNES 900
EN_MAR_BEALIT_OP Beach litter originating from national land-based sources that ends in the ocean (%) PERCENT 900
EN_MAR_BEALIT_OV Beach litter originating from national land-based sources that ends in the ocean (Tonnes) TONNES 900
EN_MAR_CHLANM Chlorophyll-a anomaly, remote sensing (%) PERCENT 2784
EN_MAR_CHLDEV Chlorophyll-a deviations, remote sensing (%) PERCENT 4131
EN_MAR_PLASDD Floating plastic debris density (count per km2) NUMBER 3
14.2.1 EN_SCP_ECSYBA Number of countries using ecosystem-based approaches to managing marine areas (1 = YES; 0 = NO) NUMBER 33
14.3.1 ER_OAW_MNACD Average marine acidity (pH) measured at agreed suite of representative sampling stations PH 903
14.4.1 ER_H2O_FWTL Proportion of fish stocks within biologically sustainable levels (not overexploited) (%) PERCENT 422
14.5.1 ER_MRN_MPA Average proportion of Marine Key Biodiversity Areas (KBAs) covered by protected areas (%) PERCENT 6049
14.6.1 ER_REG_UNFCIM Progress by countries in the degree of implementation of international instruments aiming to combat illegal, unreported and unregulated fishing (level of implementation: 1 lowest to 5 highest) NUMBER 882
14.7.1 EN_SCP_FSHGDP Sustainable fisheries as a proportion of GDP PERCENT 5880
14.a.1 ER_RDE_OSEX National ocean science expenditure as a share of total research and development funding (%) PERCENT 159
14.b.1 ER_REG_SSFRAR Degree of application of a legal/regulatory/policy/institutional framework which recognizes and protects access rights for small-scale fisheries (level of implementation: 1 lowest to 5 highest) NUMBER 882
14.c.1 ER_UNCLOS_IMPLE Score for the implementation of UNCLOS and its two implementing agreements (%) PERCENT 63
ER_UNCLOS_RATACC Score for the ratification of and accession to UNCLOS and its two implementing agreements (%) PERCENT 63

Check official data by indicator

Code
# check sdg indicator, change SeriesCode to the one you want to check
sdg14check = sdg14[sdg14["GeoAreaName"].isin(countries)]
sdg14check.loc[
    sdg14check["GeoAreaName"].str.contains(r"^(?=.*United)(?=.*Kingdom)"), "GeoAreaName"
] = "United Kingdom"
sdg14check = sdg14check[sdg14check["SeriesCode"] == "EN_MAR_BEALITSQ"].pivot_table(
    columns="TimePeriod", index="GeoAreaName", values="Value", aggfunc="mean"
)
sdg14check
missingCountries(sdg14check)
TimePeriod 2015 2016 2017 2018 2019 2020
GeoAreaName
Belgium 1.520290e+07 2.704512e+05 2.273158e+05 1.915215e+04 6.645361e+05 NaN
Bulgaria 2.319362e+05 1.924616e+05 2.292929e+06 2.618936e+06 2.589610e+06 2.159091e+06
Croatia 4.538235e+07 4.292519e+05 NaN NaN 5.534965e+04 NaN
Cyprus 3.804995e+05 1.230420e+06 4.879745e+06 4.901154e+06 NaN 1.879470e+07
Denmark 1.289859e+05 1.961387e+05 6.390451e+04 2.080706e+05 2.490390e+05 1.977920e+05
Estonia NaN NaN 1.058452e+06 NaN NaN NaN
Finland NaN NaN 2.172614e+04 1.652134e+05 NaN NaN
France 6.693829e+06 4.550833e+06 1.906840e+05 1.066566e+06 1.329666e+06 6.046173e+05
Germany 1.010614e+05 2.106574e+06 4.334803e+04 3.861386e+05 1.695590e+04 NaN
Greece 2.866051e+06 1.806335e+06 1.039325e+06 1.432935e+06 1.515755e+04 7.201374e+04
Ireland 1.666727e+04 1.233589e+05 2.938770e+05 2.351199e+04 4.400202e+02 NaN
Italy 9.195292e+06 7.008116e+06 2.331030e+06 6.977531e+05 2.116006e+05 NaN
Malta 1.205128e+06 NaN NaN 5.260832e+03 NaN NaN
Netherlands 3.870849e+06 4.560344e+04 5.221688e+04 1.038472e+05 7.550317e+03 NaN
Poland NaN 8.504996e+05 2.077893e+05 7.328124e+05 NaN NaN
Portugal 3.096130e+06 1.237300e+06 2.573624e+05 1.537330e+06 1.605057e+05 1.029442e+04
Romania 1.460573e+05 2.776233e+06 1.341007e+07 7.194247e+05 NaN NaN
Spain 1.531321e+06 4.088206e+06 2.193543e+06 1.436452e+06 1.257716e+06 2.463317e+05
Sweden 7.623596e+03 2.239108e+04 1.722629e+04 6.489057e+03 NaN NaN
Missing countries:
 Lithuania
Latvia
United Kingdom

14.1

(a) Index of coastal eutrophication

We use Gross Nitrogen Balance: Max-Min transformation where the max-value is the maximum value in the period 2012-2021 and the min-value is zero.

Gross nutrient balance (Nitrogen kg/ha), Eurostat

Source

Code
# Gross nutrient balance (BAL_UAA, Nitrogen kg/ha), Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/AEI_PR_GNB__custom_153613/

nitro = pd.read_csv("../data/aei_pr_gnb_page_linear.csv")
nitro["geo"] = nitro["geo"].map(abbrev_to_country).fillna(nitro["geo"])
nitro = nitro[nitro["geo"].isin(countries)]
nitro = nitro.pivot_table(
    columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean"
)
# convert negative values to 0
negativeValues(nitro)
mr = nitro.columns[-1]  # most recent year
# maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
nitro = (
    (nitro.loc[:, "2012":mr].max().max() - nitro.loc[:, "2012":mr])
    / (nitro.loc[:, "2012":mr].max().max() - nitro.loc[:, "2012":mr].min().min())
    * 100
).round(2)
# nitro.ffill(axis=1, inplace=True)
nitro[[2012,2016,mr]]

missingCountries(nitro)
Negative values in dataset replaced with 0
No missing countries
TIME_PERIOD 2012 2016 2019
geo
Belgium 26.99 NaN NaN
Bulgaria 87.65 63.72 85.31
Croatia 55.97 81.94 72.76
Cyprus 5.92 NaN NaN
Denmark 57.45 NaN NaN
Estonia 85.71 NaN NaN
Finland 75.77 75.82 77.70
France 88.01 75.77 80.61
Germany 61.68 64.77 72.76
Greece 74.39 NaN NaN
Ireland 82.09 72.40 NaN
Italy 59.39 67.04 NaN
Latvia 87.76 87.04 93.67
Lithuania 85.10 NaN 79.18
Malta 28.32 NaN NaN
Netherlands 13.27 0.77 15.41
Poland 75.46 77.50 75.82
Portugal 77.60 76.17 76.94
Romania 91.68 100.00 100.00
Spain 82.86 80.10 NaN
Sweden 84.08 81.28 86.68
United Kingdom 55.36 55.87 NaN

(b) Floating Plastic Debris Density

We use two indicators:

  1. Plastic Waste kg/ha: Max-Min Transformation where the max-value and min-values are the maximum and minimum in the period 2012-2021.

  2. Recovery Rate of Plastic Packaging: Without further transformation as score is provided dimensionless.

1. Plastic Waste kg/ha, Eurostat

Source

Code
# Plastic Waste kg/ha, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/ENV_WASGEN/

wasteG = pd.read_csv("../data/env_wasgen.csv")
wasteG["geo"] = wasteG["geo"].map(abbrev_to_country).fillna(wasteG["geo"])
wasteG = wasteG[wasteG["geo"].isin(countries)]

wasteG = wasteG[wasteG["unit"] == "KG_HAB"].pivot_table(
    columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean"
)
mr = wasteG.columns[-1]  # most recent year
negativeValues(wasteG)
# maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
wasteG = (
    (wasteG.loc[:, "2012":mr].max().max() - wasteG.loc[:, "2012":mr])
    / (wasteG.loc[:, "2012":mr].max().max() - wasteG.loc[:, "2012":mr].min().min())
    * 100
).round(2)
wasteG = wasteG.ffill(axis=1)  # fill empty values with last available year
wasteG[[2012, 2016, mr]]
missingCountries(wasteG)
TIME_PERIOD 2012 2016 2020
geo
Belgium 54.62 51.26 31.93
Bulgaria 92.44 78.99 59.66
Croatia 96.64 90.76 78.15
Cyprus 100.00 98.32 88.24
Denmark 88.24 87.39 84.03
Estonia 89.92 76.47 77.31
Finland 89.92 90.76 84.03
France 83.19 79.83 73.95
Germany 78.15 76.47 73.11
Greece 94.12 84.03 90.76
Ireland 82.35 78.15 70.59
Italy 64.71 49.58 35.29
Latvia 94.96 77.31 54.62
Lithuania 89.92 78.15 70.59
Malta 95.80 88.24 89.92
Netherlands 75.63 78.15 73.11
Poland 83.19 75.63 54.62
Portugal 89.92 78.99 68.07
Romania 84.03 92.44 90.76
Spain 84.03 90.76 88.24
Sweden 89.08 77.31 76.47
United Kingdom 73.11 66.39 67.23
No missing countries
2. Recovery rates for packaging waste, Plastic Packaging

Source

Code
# Recovery rates for packaging waste, Plastic Packaging, Percentage, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/ten00062/

wasteR = pd.read_csv("../data/ten00062.csv")
wasteR["geo"] = wasteR["geo"].map(abbrev_to_country).fillna(wasteR["geo"])
wasteR = wasteR[wasteR["geo"].isin(countries)]
wasteR = wasteR.pivot_table(
    columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean"
)
negativeValues(wasteR)
mr = wasteR.columns[-1]  # most recent year
wasteR = wasteR.ffill(axis=1)  # fill empty values with last available year
wasteR[[2012, 2016, mr]]
missingCountries(wasteR)
TIME_PERIOD 2012 2016 2020
geo
Belgium 92.7 99.5 99.4
Bulgaria 44.1 52.6 50.6
Croatia 45.4 41.1 34.1
Cyprus 44.8 74.3 51.9
Denmark 99.4 98.1 73.1
Estonia 44.0 85.8 87.4
Finland 51.0 97.2 99.4
France 64.0 64.5 71.8
Germany 99.7 99.8 99.9
Greece 32.2 40.2 37.6
Ireland 74.3 79.7 100.0
Italy 71.8 83.9 88.4
Latvia 38.7 41.8 47.9
Lithuania 38.9 74.4 63.4
Malta 33.3 23.7 10.2
Netherlands 98.1 95.8 94.9
Poland 26.2 54.8 45.0
Portugal 39.4 49.9 56.9
Romania 51.9 49.9 37.0
Spain 53.2 61.8 55.5
Sweden 58.1 61.7 50.7
United Kingdom 38.1 58.5 56.2
No missing countries
Marine waters affected by eutrophication

Source

Code
# Marine waters affected by eutrophication (km2 and % EEZ), Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/sdg_14_60/default/table?lang=en

eutro = pd.read_csv("../data/sdg_14_60_linear.csv")
eutro["geo"] = eutro["geo"].map(abbrev_to_country).fillna(eutro["geo"])
eutro = eutro[eutro["geo"].isin(countries) & (eutro["unit"] == "KM2")] #PC for percentage
eutro = eutro.pivot_table(columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean"
)
mr = eutro.columns[-1] 
eutro[[2012, 2016,mr ]]
missingCountries(eutro)
TIME_PERIOD 2012 2016 2022
geo
Belgium 0.0 0.0 0.0
Bulgaria 0.0 3.0 0.0
Croatia 6.0 122.0 11.0
Cyprus 0.0 20.0 10.0
Denmark 0.0 21.0 2571.0
Estonia 99.0 66.0 0.0
Finland 222.0 304.0 1053.0
France 0.0 104.0 1316.0
Germany 0.0 0.0 11.0
Greece 48.0 580.0 97.0
Ireland 0.0 0.0 256.0
Italy 0.0 376.0 0.0
Latvia 25.0 37.0 0.0
Lithuania 1.0 14.0 0.0
Malta 0.0 0.0 0.0
Netherlands 0.0 0.0 112.0
Poland 12.0 30.0 0.0
Portugal 8989.0 4840.0 33018.0
Romania 0.0 3.0 0.0
Spain 1716.0 1817.0 6058.0
Sweden 109.0 343.0 10407.0
Missing countries:
 United Kingdom

14.2

Proportion of national exclusive economic zones managed using ecosystem-based approaches

As of now, it is a binary variable: Number of countries using ecosystem-based approaches to manage marine areas

Data is scarce and the next round of data collection is in 2025. More information here

Spatial comparison within the EU is not expected to yield great differences and we neglect target 14.2.

14.3

Average marine acidity (pH) measured at agreed suite of representative sampling stations

We use two indicators:

  1. Greenhouse gas emissions under the Effort Sharing Decision (ESD): Max-Min transformation where the max- and min-values are the maximum and minimum in the assessment period (2012-2021)

  2. Carbon Emissions per capita: Max-Min Transformation where the max-value and min-values are the maximum and minimum in the period 2012-2021.

1. Greenhouse gas emissions under the Effort Sharing Decision (ESD)

Several sources (see code)

Code
# Greenhouse gas emissions under the Effort Sharing Decision (ESD), Million tonnes CO2 equivalent (Mt CO2 eq), European Environment Agency
# https://www.eea.europa.eu/data-and-maps/data/esd-4

ghgESD = pd.read_excel("../data/EEA_ESD-GHG_2022.xlsx", sheet_name=1, skiprows=11)
ghgESD = ghgESD[ghgESD["Geographic entity"].isin(countries)]
ghgESD = ghgESD.set_index("Geographic entity")
ghgESD = ghgESD.dropna(axis=1, how="all")
negativeValues(ghgESD)
mr = ghgESD.columns[-1]  # most recent year
ghgESD = ghgESD * 1_000_000
Code
# Build ESD allocation with two sources and interpolation for years previous to 2013

# Allocation for 2020 target
# https://ec.europa.eu/clima/ets/esdAllocations.do?languageCode=en&esdRegistry=-1&esdYear=&search=Search&currentSortSettings=
allocation2020 = pd.read_xml(
    "../data/esdAllocations2020.xml", xpath=".//ESDAllocations/ESDAllocationInfo"
)
allocation2020["Country"] = (
    allocation2020["ESDMemberState"]
    .map(abbrev_to_country)
    .fillna(allocation2020["ESDMemberState"])
)
allocation2020 = allocation2020[allocation2020["Country"].isin(countries)]
allocation2020 = allocation2020.pivot_table(
    columns="ESDYear", index="Country", values="Allocation", aggfunc="mean"
)

# Allocation for 2030 target
# https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32020D2126
allocation2030 = pd.read_html("../data/esdAllocations2030.html")[13][1:]
allocation2030.columns = allocation2030.iloc[0]
allocation2030 = allocation2030[1:]
allocation2030.loc[
    allocation2030["Member State"].str.contains("Netherlands"), "Member State"
] = "Netherlands"
allocation2030 = allocation2030[allocation2030["Member State"].isin(countries)]
allocation2030.set_index("Member State", inplace=True)
for col in allocation2030.columns:
    allocation2030[col] = allocation2030[col].apply(
        lambda x: str(x).replace("\xa0", "")
    )
allocation2030 = allocation2030.astype(int)

# Merge 2005 values with 2020 and 2030 allocations. Interpolate for years 2006-2012
allocationESD = ghgESD[[2005]].merge(
    allocation2020.merge(
        allocation2030, left_index=True, right_index=True, how="outer"
    ),
    left_index=True,
    right_index=True,
    how="outer",
)
allocationESD.columns = allocationESD.columns.astype(int)
allocationESD[list(range(2006, 2013))] = np.nan  # create empty columns for 2006-2012
allocationESD = allocationESD[list(range(2005, 2031))]
allocationESD.interpolate(axis=1, inplace=True)

# Calculate score for ESD
scoreESD = 100 - (100 * (ghgESD - allocationESD) / allocationESD)
scoreESD[scoreESD > 100] = 100
scoreESD = scoreESD.ffill(axis=1)  # fill empty values with last available year
scoreESD[[2013, 2016, 2021]]
missingCountries(scoreESD)
2013 2016 2021
Geographic entity
Belgium 100.000000 99.633951 100.000000
Bulgaria 100.000000 100.000000 100.000000
Croatia 100.000000 100.000000 100.000000
Cyprus 100.000000 100.000000 86.945820
Denmark 100.000000 100.000000 100.000000
Estonia 100.000000 100.000000 100.000000
Finland 100.000000 96.549221 100.000000
France 100.000000 100.000000 100.000000
Germany 100.000000 99.619542 100.000000
Greece 100.000000 100.000000 100.000000
Ireland 100.000000 99.312005 96.146602
Italy 100.000000 100.000000 100.000000
Latvia 100.000000 100.000000 100.000000
Lithuania 100.000000 100.000000 100.000000
Malta 92.959862 85.673820 100.000000
Netherlands 100.000000 100.000000 100.000000
Poland 100.000000 99.344500 100.000000
Portugal 100.000000 100.000000 100.000000
Romania 100.000000 100.000000 100.000000
Spain 100.000000 100.000000 100.000000
Sweden 100.000000 100.000000 100.000000
United Kingdom 100.000000 100.000000 100.000000
No missing countries

Alternative (dif-ref) score for ESD (measuring this way does not make a lot of sense to me)

Code
# Alternative (dif-ref) score for ESD (measuring this way does not make a lot of sense to me)

scoreESD2020 = 100 - 100 * (
    ghgESD.loc[:, :2020].subtract(allocationESD[2020], axis=0)
).divide(allocationESD[2020], axis=0)
scoreESD2030 = 100 - 100 * (
    ghgESD.loc[:, 2021:].subtract(allocationESD[2030], axis=0)
).divide(allocationESD[2030], axis=0)
scoreESD1 = scoreESD2020.merge(scoreESD2030, left_index=True, right_index=True)
scoreESD1[scoreESD1 > 100] = 100
# scoreESD1[[2012, 2018, 2021]]

2. Carbon emissions per capita

Several sources (see code)

Code
# Population on 1 January, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/tps00001/

pop = pd.read_csv("../data/tps00001.csv")
pop["geo"] = pop["geo"].map(abbrev_to_country).fillna(pop["geo"])
pop = pop[pop["geo"].isin(countries)]
pop = pop.pivot_table(
    columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean"
)
Code
# CO2, TOTX4_MEMONIA, THS_T thousand tonnes, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/ENV_AIR_GGE/

co2 = pd.read_csv("../data/env_air_gge.csv")
co2["geo"] = co2["geo"].map(abbrev_to_country).fillna(co2["geo"])
co2 = co2[co2["geo"].isin(countries)]
co2 = co2.pivot_table(
    columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean"
)

mr = co2.columns[-1]  # most recent year
co2pc = co2 / pop * 1000 * 1000  ## tonnes co2 per capita

# maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
co2pc = (
    (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr])
    / (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr].min().min())
    * 100
).round(2)

co2pc[[2012, 2016, mr]]

missingCountries(co2pc)
TIME_PERIOD 2012 2016 2021
geo
Belgium 42.05 45.74 53.10
Bulgaria 64.56 65.88 56.04
Croatia 78.36 80.32 79.02
Cyprus 25.37 21.33 28.96
Denmark 46.29 51.71 63.46
Estonia 17.51 9.15 41.36
Finland 54.35 52.19 57.25
France 71.19 74.19 78.78
Germany 44.53 47.30 58.87
Greece 46.28 58.48 59.09
Ireland 25.41 22.20 29.20
Italy 67.06 74.56 75.56
Latvia 86.16 79.85 69.73
Lithuania 80.85 79.24 74.57
Malta 47.73 79.86 81.87
Netherlands 30.39 32.16 47.20
Poland 47.12 47.37 44.51
Portugal 73.10 71.04 81.12
Romania 81.28 88.51 86.97
Spain 62.10 67.35 73.73
Sweden 92.50 94.04 98.95
United Kingdom NaN NaN NaN
No missing countries
Code
# Data to corrobarate the ENV_AIR_GGE data. Very close numbers and we have for 2021
# Fossil CO2 emissions by country (territorial), million tonnes of carbon per year (1MtC = 1 million tonne of carbon = 3.664 million tonnes of CO2)
# https://globalcarbonbudget.org/carbonbudget/

co2T = pd.read_excel(
    "../data/National_Fossil_Carbon_Emissions_2022v1.0.xlsx", sheet_name=1, skiprows=11
)
co2T = co2T.T
co2T.columns = co2T.iloc[0, :]
co2T.columns = co2T.columns.astype(int)
co2T = co2T.rename_axis(index="geo", columns=None)
co2T = co2T[co2T.index.isin(countries)]

mr = co2T.columns[-1]  # most recent year
co2T = co2T * 3.664  # convert from carbon to co2
co2pc = co2T / pop * 1000_000  ## tonnes co2 per capita

# maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
co2pc = (
    (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr])
    / (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr].min().min())
    * 100
).round(2)
co2pc = co2pc.ffill(axis=1)  # fill empty values with last available year
co2pc[[2012, 2016, mr]]

missingCountries(co2pc)
2012 2016 2021
geo
Belgium 47.69 51.35 55.72
Bulgaria 69.89 71.90 73.62
Croatia 87.80 89.00 88.46
Cyprus 54.53 52.28 54.06
Denmark 65.29 70.80 82.76
Estonia 12.53 13.24 59.32
Finland 45.77 53.09 68.23
France 79.37 83.14 87.31
Germany 40.23 43.51 57.15
Greece 56.07 69.72 81.01
Ireland 55.34 53.56 62.32
Italy 68.21 75.71 78.69
Latvia 94.41 94.54 93.06
Lithuania 85.17 86.47 83.59
Malta 70.46 100.00 97.23
Netherlands 42.63 43.76 57.51
Poland 53.21 53.55 52.37
Portugal 85.49 84.33 92.02
Romania 87.58 92.56 90.60
Spain 75.18 78.05 83.89
Sweden 83.74 88.28 96.28
United Kingdom 60.82 73.99 84.41
No missing countries

14.4

Proportion of fish stocks within biologically sustainable levels

We use two indicators:

  1. FMSY/F: catch-weighted average

  2. B/BMSY: catch-weighted average

1. FMSY/F

2. B/BMSY

14.5

Coverage of protected areas in relation to marine area

We consider two indicators:

  1. Coverage of protected areas in relation to marine areas: Distance to Reference transformation where the max-value is set to 30% and the min-value is 0%
  2. OHI ‘Biodiversity’ Index : No further transformation

1. Marine protected areas (% of territorial waters)

Source

Code
# Marine protected areas (% of territorial waters), World Bank aggregation of https://www.protectedplanet.net/en/thematic-areas/marine-protected-areas
# https://data.worldbank.org/indicator/ER.MRN.PTMR.ZS

mpa = pd.read_csv("../data/API_ER.MRN.PTMR.ZS_DS2.csv", skiprows=4)
mpa = mpa[mpa["Country Name"].isin(countries)].set_index("Country Name")
mpa = mpa.dropna(axis=1, how="all")
mpa = mpa.drop(["Country Code", "Indicator Name", "Indicator Code"], axis=1)
# display all rows
negativeValues(mpa)
mpa = (mpa / 0.3).round(2)  # dis-ref with target 30%
mpa[mpa > 100] = 100
mpa.sort_index(inplace=True)
mpa[["2016", "2021"]]
missingCountries(mpa)
2016 2021
Country Name
Belgium 100.00 100.00
Bulgaria 27.01 27.03
Croatia 28.47 29.96
Cyprus 0.41 28.74
Denmark 59.49 61.07
Estonia 62.09 62.60
Finland 37.10 39.96
France 87.22 100.00
Germany 100.00 100.00
Greece 4.86 15.06
Ireland 7.77 7.77
Italy 29.28 32.45
Latvia 53.45 53.46
Lithuania 71.72 85.31
Malta 20.88 24.82
Netherlands 71.64 89.53
Poland 75.24 75.24
Portugal 27.86 56.06
Romania 28.09 77.00
Spain 29.10 42.52
Sweden 27.35 52.54
United Kingdom 67.45 100.00
No missing countries

2. OHI ‘Biodiversity’ Index

Source

Code
# OHI 'Biodiversity' Index
# https://oceanhealthindex.org/global-scores/data-download/

ohiBio = pd.read_csv("../data/scoresOHI.csv")
ohiBio = ohiBio[ohiBio["region_name"].isin(countries)]
ohiBio = ohiBio[(ohiBio.long_goal == "Biodiversity") & (ohiBio.dimension == "score")]
ohiBio = ohiBio.pivot_table(
    columns="scenario", index="region_name", values="value", aggfunc="mean"
)

ohiBio[[2012, 2018, 2022]]
missingCountries(ohiBio)
scenario 2012 2018 2022
region_name
Belgium 74.65 71.16 71.53
Bulgaria 76.12 69.96 66.19
Croatia 60.77 55.80 55.65
Cyprus 68.48 65.77 64.07
Denmark 72.56 69.75 71.17
Estonia 80.68 73.42 74.96
Finland 79.25 71.68 73.02
France 72.20 74.81 75.08
Germany 73.45 69.04 72.25
Greece 66.28 60.73 59.42
Ireland 67.64 67.55 66.89
Italy 68.72 65.31 64.08
Latvia 77.70 70.57 72.39
Lithuania 76.04 69.22 71.82
Malta 72.23 64.57 64.19
Netherlands 67.14 65.53 68.68
Poland 67.41 64.34 66.45
Portugal 75.73 72.12 72.07
Romania 82.49 72.39 70.55
Spain 72.29 69.36 68.31
Sweden 75.00 71.16 70.96
United Kingdom 68.30 72.90 71.45
No missing countries

14.6

Degree of implementation of international instruments aiming to combat illegal, unreported and unregulated fishing

We use two indicators:

  1. Fishing Subsidies relative to Landings: Max-Min Transformation where the max-value and min-value are the maximum and minimum in the assessment period (2012-2021)
  2. TAC/Catch:
Code
# load oecd abbreviation list
with open("../data/oecdAbb.csv") as csv_file:
    reader = csv.reader(csv_file)
    oecdAbb = dict(reader)

1. Fishing Subsidies relative to Landings

Several sources

Code
# Fisheries Support Estimate
# https://stats.oecd.org/Index.aspx?DataSetCode=FISH_FSE

fse = pd.read_csv("../data/FISH_FSE.csv")
fse = fse[fse["Country"].isin(countries)]

# strip variable codes and delete parent variables to avoid double counting
# solution from https://stackoverflow.com/q/76183612/14534411

separator = "."
fse["vCode"] = fse.Variable.str.rsplit(".", n=1).str.get(0)

variableAll = fse.vCode.unique().tolist()


def is_parent(p, target):
    return p.startswith(target) and len(p) > len(target) and p[len(target)] == "."


vSupport = []
prev = ""
for s in sorted(variableAll) + [""]:
    if prev and not is_parent(s, prev):
        vSupport.append(prev)
    prev = s

fse = fse[(fse.vCode.isin(vSupport)) | (fse.vCode.isna())]

# we use US dollars because some countries use their own currency ('Danish Krone', 'Zloty', 'Swedish Krona')
fse = fse[fse.Measure == "US dollar"][["Country", "Variable", "Value", "Year", "Unit"]]

# we include I.B, I.C, I.D, I.E  II. SUPPORT FOR SERVICES TO THE SECTOR (except the ones specified below)
fse = fse[
    fse.Variable.str.startswith(("I.E.1", "II."))
    & ~fse.Variable.str.startswith(("II.E", "II.F", "II.G"))
]

fse = fse.groupby(["Country", "Year"]).sum(numeric_only=True)
negativeValues(fse)
# fse = fse.pivot_table(columns='Year', index='Country', values='Value', aggfunc='mean')
# fse[[2012, 2016, 2020]]
Code
# Landings in USD
# https://data.oecd.org/fish/fish-landings.htm

landing = pd.read_csv("../data/fishLandingsOECD.csv")
landing["LOCATION"] = landing["LOCATION"].map(oecdAbb).fillna(landing["LOCATION"])
landing = landing[
    landing["LOCATION"].isin(countries)
    & (landing["MEASURE"] == "USD")
    & (landing["SUBJECT"] == "TOT")
]
# landing = landing.pivot_table(columns='TIME', index='LOCATION', values='Value', aggfunc='mean')
# landing = landing[list(range(2010, 2021))]
landing.rename(
    columns={"LOCATION": "Country", "TIME": "Year", "Value": "Landing"}, inplace=True
)
landing.set_index(["Country", "Year"], inplace=True)
Code
# merge subsidies-landings and calculate score
fseLanding = pd.merge(
    fse, landing, how="left", left_on=["Country", "Year"], right_on=["Country", "Year"]
)
fseLanding["fseLanding"] = fseLanding.Value / fseLanding.Landing
fseLanding = fseLanding.pivot_table(
    columns="Year", index="Country", values="fseLanding", aggfunc="mean"
)
mr = fseLanding.columns[-1]  # most recent year
# fseLanding = fseLanding[~fseLanding.index.str.contains("Poland")] Poland is an outlier
# maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
fseScore = (
    (fseLanding.loc[:, 2012:mr].max().max() - fseLanding.loc[:, 2012:mr])
    / (fseLanding.loc[:, 2012:mr].max().max() - fseLanding.loc[:, 2012:mr].min().min())
    * 100
).round(2)
fseScore = fseScore.ffill(axis=1)  # fill empty values with last available year
fseScore[[2012, 2016, mr]]


missingCountries(fseScore)
Year 2012 2016 2020
Country
Belgium 99.47 99.96 99.61
Denmark 98.66 98.88 98.66
Estonia 97.82 99.78 98.83
France 99.40 99.99 99.97
Germany NaN 100.00 99.78
Greece 99.72 100.00 99.30
Ireland NaN 99.88 99.21
Italy 99.54 99.16 99.49
Latvia NaN 99.31 99.86
Lithuania 99.60 99.14 99.86
Netherlands 99.94 99.89 99.96
Poland 54.66 49.16 26.98
Portugal 99.94 100.00 100.00
Spain 99.79 99.96 99.95
Sweden 96.51 95.76 95.86
United Kingdom 99.80 99.92 99.82
Missing countries:
 Bulgaria
Cyprus
Croatia
Finland
Malta
Romania
Code
# Alternative dataset. Difference is large
# https://www.sciencedirect.com/science/article/abs/pii/S0308597X16000026#bib4
sumaila2009 = pd.read_excel("../data/subsidies2016Sumaila.xlsx")
sumaila2009["Countries"] = sumaila2009["Countries"].str.strip()
sumaila2009 = sumaila2009[
    sumaila2009["Countries"].isin(countries) | sumaila2009["Countries"].isin(countryAbb)
]
sumaila2009.Countries = sumaila2009.Countries.map(abbrev_to_country).fillna(
    sumaila2009.Countries
)
sumaila2009 = sumaila2009[["Countries", "ReEst_Subsidy2009", "SubType"]]
sumaila2009 = sumaila2009.groupby(["Countries"]).sum(numeric_only=True)
# sumaila2009

missingCountries(sumaila2009)

sumaila2018 = pd.read_csv("../data/subsidies2019Sumaila.csv")
sumaila2018 = sumaila2018[["Country", "Constant 2018 USD", "Type"]]
sumaila2018 = sumaila2018[sumaila2018.Country.isin(countries)]
sumaila2018 = sumaila2018.groupby(["Country"]).sum(numeric_only=True)
# sumaila2018

missingCountries(sumaila2018)
Missing countries:
 Denmark
Netherlands
No missing countries

2. TAC/Catch

14.7

Sustainable fisheries as a proportion of GDP in small island developing States, least developed countries and all countries

We use two indicators:

  1. OHI Coastal ‘Livelihoods & economies’ Index: No further transformation
  2. OHI ‘Tourism & recreation’ Index: No further transformation

1. OHI ‘Livelihoods & economies’ Index

Source

Code
# OHI 'Livelihoods & economies' Index
# https://oceanhealthindex.org/global-scores/data-download/

ohiLive = pd.read_csv("../data/scoresOHI.csv")
ohiLive = ohiLive[ohiLive["region_name"].isin(countries)]
ohiLive = ohiLive[
    (ohiLive.long_goal == "Livelihoods & economies") & (ohiLive.dimension == "score")
]
ohiLive = ohiLive.pivot_table(
    columns="scenario", index="region_name", values="value", aggfunc="mean"
)

ohiLive[[2012, 2018, 2022]]

missingCountries(ohiLive)
scenario 2012 2018 2022
region_name
Belgium 67.75 80.50 80.50
Bulgaria 71.31 71.31 71.31
Croatia 78.76 78.56 78.56
Cyprus 65.03 63.18 63.18
Denmark 61.44 66.22 66.22
Estonia 59.66 73.81 73.81
Finland 75.44 82.24 82.24
France 79.40 79.41 79.41
Germany 86.50 89.31 89.31
Greece 64.92 61.38 61.38
Ireland 73.29 77.30 77.30
Italy 83.58 85.62 85.62
Latvia 77.74 89.50 89.50
Lithuania 70.18 76.21 76.21
Malta 84.24 86.67 86.67
Netherlands 60.89 62.19 62.19
Poland 80.23 81.50 81.50
Portugal 92.97 93.38 93.38
Romania 90.29 88.77 88.77
Spain 77.74 77.21 77.21
Sweden 100.00 100.00 100.00
United Kingdom 87.45 88.59 88.59
No missing countries

2. OHI ‘Tourism & recreation’ Index

Source

Code
# OHI 'Tourism & recreation' Index
# https://oceanhealthindex.org/global-scores/data-download/

ohiTour = pd.read_csv("../data/scoresOHI.csv")
ohiTour = ohiTour[ohiTour["region_name"].isin(countries)]
ohiTour = ohiTour[
    (ohiTour.long_goal == "Tourism & recreation") & (ohiTour.dimension == "score")
]
ohiTour = ohiTour.pivot_table(
    columns="scenario", index="region_name", values="value", aggfunc="mean"
)

ohiTour[[2012, 2018, 2022]]

missingCountries(ohiTour)
scenario 2012 2018 2022
region_name
Belgium 25.64 23.87 22.89
Bulgaria 23.30 27.91 19.88
Croatia 69.36 88.66 70.06
Cyprus 60.50 58.80 59.75
Denmark 23.47 27.84 22.73
Estonia 38.02 41.89 36.82
Finland 24.10 26.95 19.94
France 44.27 47.72 45.29
Germany 71.18 72.50 61.55
Greece 85.06 100.00 100.00
Ireland 23.52 22.06 21.68
Italy 54.93 65.70 57.67
Latvia 36.83 33.42 30.08
Lithuania 14.03 19.44 14.89
Malta 100.00 100.00 100.00
Netherlands 61.06 61.17 62.31
Poland 16.68 19.69 17.89
Portugal 61.92 89.12 72.35
Romania 20.15 24.25 22.50
Spain 40.22 48.27 41.12
Sweden 37.09 42.52 31.40
United Kingdom 50.05 53.90 50.69
No missing countries

14.a

Proportion of total research budget allocated to research in the field of marine technology

We use two indicators:

  1. Official UNSD indicator ER_RDE_OSEX: Max-Min transformation where the max-value and min-value are the maximum and minimum in the assessment period (2009-2017) of all European countries (not restricted to the countries in the analysis).
  2. SAD/TAC: Catch-weighted TAC relative to Scientific Advice

Note: ER_RDE_OSEX data comes from Global Ocean Science Report (GOSR) 2020, and goes from 2013 to 2017. Data for 2009-2012 data is available in the UNstats archive (download csv for 29-Mar-19)

1. Ocean science expenditure ER_RDE_OSEX

Several sources

Code
# EEZ area
# https://emodnet.ec.europa.eu/geonetwork/srv/eng/catalog.search#/metadata/d4a3fede-b0aa-485e-b4b2-77e8e3801fd0
eez = pd.read_csv(
    "../data/EMODnet_EEZ_v11_20210506.csv", delimiter=";", encoding="latin-1"
)
eez = eez[["Territory", "Area_km2"]].groupby(["Territory"]).sum(numeric_only=True)
eez = eez[eez.index.isin(allEurope)]
Code
# %%time
# National ocean science expenditure as a share of total research and development funding (%), UNstats archive (years 2009-2012)
# https://unstats.un.org/sdgs/indicators/database/archive

# # read old data by chunks to speed loading and save 14.a.1 to separate file
# iter_csv = pd.read_csv('./data/AllData_Onward_20190329.csv', iterator=True, chunksize=1000, encoding='cp1252')
# oResearchOld = pd.concat([chunk[chunk.Indicator == '14.a.1'] for chunk in iter_csv])
# oResearchOld.to_csv('./data/archive14a1.csv', index=False)

oResearchOld = pd.read_csv("../data/archive14a1.csv")
oResearchOld = oResearchOld.pivot(
    index="GeoAreaName", columns="TimePeriod", values="Value"
)
Code
# National ocean science expenditure as a share of total research and development funding (%), UNstats
# https://unstats.un.org/sdgs/dataportal/database

# read official data and merge with archive data
oResearch = sdg14[sdg14["SeriesCode"] == "ER_RDE_OSEX"].pivot_table(
    columns="TimePeriod", index="GeoAreaName", values="Value", aggfunc="mean"
)
oResearch = oResearchOld.merge(
    oResearch, left_index=True, right_index=True, how="outer"
)
# use all countries in Europe
oResearch = oResearch[oResearch.index.isin(allEurope)].dropna(how="all", axis=1)
# fill nan of year 2013 from new report with old report and 2016 with 2017
oResearch[2013] = oResearch["2013_y"].fillna(oResearch["2013_x"])
# oResearch[2016] = oResearch[2016].fillna(oResearch[2017])
oResearch = oResearch[list(range(2013, 2018))]
# weighted by EEZ area
oResearch = oResearch.merge(eez, left_index=True, right_index=True, how="outer")
for col in oResearch.drop("Area_km2", axis=1).columns:
    oResearch[col] = (
        oResearch[col] * oResearch["Area_km2"] / (oResearch["Area_km2"].sum())
    )
# maxMin transformation as (x - min)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
oResearch = (
    (oResearch.loc[:, 2013:2017] - oResearch.loc[:, 2013:2017].min().min())
    / (
        oResearch.loc[:, 2013:2017].max().max()
        - oResearch.loc[:, 2013:2017].min().min()
    )
    * 100
).round(2)
# fill nan with mean of column
for col in oResearch.columns:
    oResearch[col] = oResearch[col].fillna(oResearch[col].mean())

oResearch = oResearch[oResearch.index.isin(countries)]
oResearch[[2013, 2016, 2017]]

missingCountries(oResearch)
2013 2016 2017
Belgium 0.020000 0.000000 0.000
Bulgaria 0.100000 0.050000 0.060
Croatia 7.850000 6.764444 12.894
Cyprus 3.858182 6.764444 12.894
Denmark 3.858182 6.764444 12.894
Estonia 3.858182 6.764444 12.894
Finland 1.210000 1.230000 1.210
France 13.300000 6.764444 8.860
Germany 0.840000 0.650000 0.630
Greece 3.858182 6.764444 12.894
Ireland 3.858182 6.764444 100.000
Italy 9.550000 7.950000 5.030
Latvia 3.858182 6.764444 12.894
Lithuania 3.858182 6.764444 12.894
Malta 3.858182 6.764444 12.894
Netherlands 0.740000 0.620000 0.690
Poland 0.210000 0.130000 0.120
Portugal 3.858182 36.750000 12.894
Romania 0.890000 6.764444 12.894
Spain 7.730000 13.500000 12.340
Sweden 3.858182 6.764444 12.894
United Kingdom 3.858182 6.764444 12.894
No missing countries

2. SAD/TAC

Code
# If a country fishes only on fish stocks where assignment of TAC follows scientific advice, it would score 100

14.b

Degree of application of a legal/regulatory/policy/institutional framework which recognizes and protects access rights for small‐scale fisheries

We use two indicators:

  1. OHI Artisanal Fishing Opportunities Index: No further transformation
  2. Percentage of Fish Species Threatened: No further transformation

1. OHI ‘Artisanal opportunities’ Index

Source

Code
# OHI 'Artisanal opportunities' Index
# https://oceanhealthindex.org/global-scores/data-download/

ohiArt = pd.read_csv("../data/scoresOHI.csv")
ohiArt = ohiArt[ohiArt["region_name"].isin(countries)]
ohiArt = ohiArt[
    (ohiArt.long_goal == "Artisanal opportunities") & (ohiArt.dimension == "status")
]
ohiArt = ohiArt.pivot_table(
    columns="scenario", index="region_name", values="value", aggfunc="mean"
)

ohiArt[[2012, 2018, 2022]]

missingCountries(ohiArt)
scenario 2012 2018 2022
region_name
Belgium 79.51 77.65 78.66
Bulgaria 56.01 55.59 63.71
Croatia 66.35 62.56 67.33
Cyprus 77.16 71.91 76.38
Denmark 70.50 77.24 74.63
Estonia 89.39 91.18 99.24
Finland 84.67 82.73 77.45
France 77.09 76.44 78.61
Germany 73.21 76.62 73.23
Greece 66.81 61.42 67.53
Ireland 69.90 73.46 73.52
Italy 63.11 64.18 68.29
Latvia 87.40 89.29 99.01
Lithuania 88.71 90.62 97.63
Malta 71.63 76.40 81.47
Netherlands 55.01 57.44 60.35
Poland 87.82 76.21 77.61
Portugal 77.81 65.46 66.08
Romania 67.55 69.64 79.19
Spain 73.45 70.66 73.08
Sweden 94.77 95.73 97.37
United Kingdom 81.35 82.74 79.79
No missing countries

2. Percentage of Fish Species Threatened

Source TBD

Code
# Percentage of Fish Species Threatened

# Waiting to get IUCN API key

# I would not use time series comparison because of this: https://www.iucnredlist.org/assessment/red-list-index

# https://fishbase.mnhn.fr/country/CountryChecklist.php?c_code=056&vhabitat=threatened&csub_code=
# Last updated 2019
# We can extract the data and calculate the percentage
# https://en.wikipedia.org/wiki/ISO_3166-1_numeric

# https://data.worldbank.org/indicator/EN.FSH.THRD.NO
# Data is only for 2018

#####################

# https://stats.oecd.org/Index.aspx?DataSetCode=WILD_LIFE#
# Here we have by country updated most recent.
# Missing countries: Bulgaria,Cyprus,Croatia,Ireland,Malta,Romania,United Kingdom

threatened = pd.read_csv("../data/WILD_LIFE_10062023112434707.csv")
threatened = threatened[
    (threatened.SPEC == "FISH_TOT") & (threatened.IUCN == "THREAT_PERCENT")
]
threatened = threatened[["Country", "Value"]].set_index("Country")
threatened = threatened[threatened.index.isin(countries)].rename(
    columns={"Value": 2021}
)
threatened[2021] = 100 - threatened[2021]
threatened

missingCountries(threatened)
2021
Country
Belgium 79.577
Greece 90.526
Netherlands 76.289
Poland 80.159
Portugal 66.154
Sweden 84.848
Estonia 91.262
Italy 92.668
Finland 86.364
Germany 75.127
Latvia 97.701
Lithuania 95.238
France 84.049
Spain 96.338
Denmark 96.639
Missing countries:
 Bulgaria
Cyprus
Croatia
Ireland
Malta
Romania
United Kingdom

14.c

Number of countries making progress in ratifying, accepting and implementing through legal, policy and institutional frameworks, ocean-related instruments that implement international law, as reflected in the United Nations Convention on the Law of the Sea

We use two indicators:

  1. Participation in agreements of the International Marine Organization (IMO Participation Rate):
  2. Measures under the Marine Strategy Framework Directive:

1. Participation in agreements of the International Marine Organization

Source

Code
# dictionary for country codes used by the GISIS
with open("../data/gisisDict.csv") as csv_file:
    reader = csv.reader(csv_file)
    gisisDict = dict(reader)

gisisDict = {v: k for k, v in gisisDict.items()}
Code
# Participation in agreements of the International Marine Organization

# https://www.imo.org/en/About/Conventions/Pages/StatusOfConventions.aspx Excel file with current status of IMO conventions
# We get the historical data from the GISIS database: https://gisis.imo.org/Public/ST/Ratification.aspx
# You need to create account to access data.

# I tried to scrape the data but I am getting errors with Selenium and bs4.
# I downloaded the html manually

gisisCountries = {k: v for k, v in gisisDict.items() if k in countries}
listIMO = []
for v in gisisCountries.values():
    link = "https://gisis.imo.org/Public/ST/Ratification.aspx?cid=" + str(v)
    listIMO.append(link)

# for i in listIMO:
#     webbrowser.open(i)
Code
imoRatedf = pd.DataFrame(columns=["Country", 2012, 2018, 2021])

# loop thru the html files in the folder and extract the data
for i in range(len(os.listdir("../data/treatiesIMO/"))):
    imoRate = pd.read_html(
        "../data/treatiesIMO/Status of Treaties » Ratification of Treaties{}.html".format(
            i
        )
    )[4]
    # get the country name from the html file name by checking if string is in the list of countries
    for country in countries:
        if country in imoRate["Treaty"][0]:
            countryIMO = country
    imoRate.columns = imoRate.iloc[1]
    imoRate = imoRate[2:]
    # new column with the year of accession and denounced
    imoRate["accession"] = (
        imoRate["Date of entry into force in country"]
        .str.extract("^([^(]+)")
        .fillna("")
    )
    imoRate["denounced"] = imoRate["Date of entry into force in country"].str.extract(
        ".*\\:(.*)\\).*"
    )
    imoRate[["accession", "denounced"]] = (
        imoRate[["accession", "denounced"]]
        .apply(pd.DatetimeIndex)
        .apply(lambda x: x.dt.year)
    )
    # count the number of treaties each country accessioned and didn't denounced by 2012, 2018 and 2021
    for i in (2012, 2018, 2021):
        imoRate[str(i)] = np.where(
            (imoRate.accession < i)
            & ((imoRate.denounced > i) | (imoRate.denounced.isna())),
            1,
            0,
        )
    imoCount = (
        countryIMO,
        imoRate["2012"].sum(),
        imoRate["2018"].sum(),
        imoRate["2021"].sum(),
    )
    imoRatedf.loc[len(imoRatedf), imoRatedf.columns] = imoCount
# calculate total possible treaties, apply dif-ref and convert to percentage
totalIMO = len(imoRate.dropna(subset=["Date of entry into force in country"]))
imoRatedf = imoRatedf.set_index("Country").sort_index()
imoRatedf = 100 - 100 * (totalIMO - imoRatedf).divide(totalIMO)
imoRatedf = imoRatedf.astype(np.float64)
imoRatedf = imoRatedf.apply(pd.to_numeric)
imoRatedf

missingCountries(imoRatedf)
2012 2018 2021
Country
Belgium 78.431373 84.313725 90.196078
Bulgaria 76.470588 80.392157 82.352941
Croatia 72.549020 74.509804 74.509804
Cyprus 68.627451 70.588235 72.549020
Denmark 80.392157 88.235294 92.156863
Estonia 76.470588 78.431373 82.352941
Finland 76.470588 86.274510 90.196078
France 84.313725 90.196078 96.078431
Germany 80.392157 88.235294 88.235294
Greece 80.392157 84.313725 84.313725
Ireland 66.666667 68.627451 68.627451
Italy 72.549020 74.509804 74.509804
Latvia 80.392157 80.392157 82.352941
Lithuania 58.823529 62.745098 64.705882
Malta 60.784314 66.666667 66.666667
Netherlands 84.313725 90.196078 92.156863
Poland 80.392157 84.313725 86.274510
Portugal 68.627451 78.431373 86.274510
Romania 58.823529 62.745098 64.705882
Spain 86.274510 88.235294 88.235294
Sweden 82.352941 92.156863 94.117647
United Kingdom 78.431373 78.431373 78.431373
No missing countries

Measures under the Marine Strategy Framework Directive

See code for notes

Code
# The barplot here: https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:52018DC0562&from=EN
# Comes from https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:52018SC0393

# The analogous report for 2020 is here https://environment.ec.europa.eu/system/files/2023-04/C_2023_2203_F1_COMMUNICATION_FROM_COMMISSION_EN_V5_P1_2532109.PDF
# But the assessment is much shorter. They refer the reader to a JRC report:
# https://publications.jrc.ec.europa.eu/repository/handle/JRC129363
# That report assesses all the descriptors, but it cannot be compared to the previous assessment.
# Moreover, the source code and data are not available.

# Overall, it is hard to make an indicator for the measures taken against pressure indicators by the MS.
# Countries report different measures and data is poor.

Indicators aggregation

Given our ratio-scale full comparable indicators,\(I_{it}\), meaningful aggregation of \(N\) indicators into a composite indicator \(CI_t\) is obtained according to social choice theory by applying a generalized mean:

\[CI_t(\alpha_{it},I_{it},\sigma) = \left(\sum^N_{i=1}\alpha_{it}I^{\frac{\sigma-1}{\sigma}}_{it}\right)^{\frac{\sigma}{\sigma-1}} \quad \text{for} \quad t = 2012, 2018, 2021 \text{(or most recent)}\]

with weights \(\alpha_{it} > 0\) and $0 ≤ ≤ $. The parameter \(\sigma\) is used to quantify the elasticity of substitution between the different indicators. High (low) values of \(\sigma\) imply good (poor) substitution possibilities which means that a high score in one indicator can (cannot) compensate a low score in another indicator. Consequently, high and low values of \(\sigma\) correspond to concepts of weak and strong sustainability, respectively. Depending on \(\sigma\), one can obtain a full class of specific function forms for the composite indicator.

We define:

\(\sigma_{Target} = 0.5\) and \(\sigma_{Target} = 10\)

Code
# fmt: off
# missing 14.2, mortality, biomass, tacCatch, sadTac, msfd
varDf = [ nitro, wasteG, wasteR,scoreESD, co2pc, mpa, ohiBio, fseScore, 
         ohiLive, ohiTour, oResearch, ohiArt, threatened, imoRatedf
]
varNames = [ "nitro", "wasteG", "wasteR", "scoreESD", "co2pc", 
            "mpa", "ohiBio", "fseScore", "ohiLive", "ohiTour",
              "oResearch", "ohiArt", "threatened", "imoRatedf",
]
# fmt: on
dictIndicators = dict(zip(varNames, varDf))

# stack variables in each dataframe
for name, df in dictIndicators.items():
    df = df.stack().to_frame().rename(columns={0: str(name)})
    df.index.names = ["Country", "Year"]
    df.reset_index(inplace=True)
    df.Year = df.Year.astype(int)
    df.set_index(["Country", "Year"], inplace=True)
    dictIndicators[name] = df
# merge all variables into one dataframe, forward and back fill by country
indicators = pd.concat(dictIndicators.values(), axis=1, join="outer")
indicators = indicators.reset_index().sort_values(["Country", "Year"])
indicators = indicators[indicators.Year.isin(list(range(2012, 2022)))]
indicators = indicators.groupby(["Country"], group_keys=False).apply(
    lambda x: x.ffill().bfill()
)
indicators = indicators.set_index(["Country", "Year"])
indicatorsL = {
    "plastic": ["wasteG", "wasteR"],
    "14.1": ["nitro", "plastic"],
    # '14.2':[''],
    "14.3": ["scoreESD", "co2pc"],
    "14.4": ["mortality", "biomass"],
    "14.5": ["mpa", "ohiBio"],
    "14.6": ["fseScore", "tacCatch"],
    "14.7": ["ohiLive", "ohiTour"],
    "14.a": ["oResearch", "sadTac"],
    "14.b": ["ohiArt", "threatened"],
    "14.c": ["imoRatedf", "msfd"],
}

# calculate composite indicators for each target importing the function from composite.py
targets = indicators.copy()
for target, indicator in indicatorsL.items():
    try:
        alpha = 1 / len(indicatorsL[target])
        df = targets[indicator]
        sigma = 10
        targets[target] = ci.compositeDF(alpha, df, sigma)
    except KeyError:
        print("Missing indicator for", target)

targets = targets[[i for i in targets if i.startswith("14")]]
targets
Missing indicator for 14.4
Missing indicator for 14.6
Missing indicator for 14.a
Missing indicator for 14.c
14.1 14.3 14.5 14.7 14.b
Country Year
Belgium 2012 49.639741 73.370139 87.232641 46.200186 79.543499
2013 50.713405 73.474526 86.048292 52.202161 79.483495
2014 39.106651 75.883383 85.798948 52.808338 79.318458
2015 39.025223 74.454387 85.778589 52.449158 79.203412
2016 53.388710 75.098105 85.055422 51.491526 79.003292
... ... ... ... ... ... ...
United Kingdom 2017 59.267750 87.862081 84.925435 69.610536 38.798780
2018 58.873452 88.511220 84.489188 71.031424 38.303437
2019 58.873452 89.605287 84.007510 70.897766 37.849758
2020 58.873452 92.172006 85.243933 69.011947 37.516443
2021 58.873452 92.172006 85.289779 69.378426 36.710932

220 rows × 5 columns

Monte Carlo simulation

Code source is in the composite.py file

Code
%%time
# calculate composite score for each target importing the function from composite.py
# monte carlo for strong sustainability and one sigma for weak sustainability
scoresStrong = ci.compositeMC(targets, years=[2012, 2016, 2021], simulations=10)
scoresWeak = pd.DataFrame(ci.compositeDF(alpha = 1 / len(targets.columns), df = targets, sigma = 10), columns=['scoreWeak'])
CPU times: user 228 ms, sys: 19.7 ms, total: 247 ms
Wall time: 242 ms

Plots

Code
InteractiveShell.ast_node_interactivity = "last_expr"  # last_expr
Code
# merge all data and calculate min, max, and EEZ-weigthed average for indicators and targets

data_frames = [indicators, targets, scoresStrong, scoresWeak]
fullData = reduce(
    lambda left, right: pd.merge(
        left, right, left_index=True, right_index=True, how="inner"
    ),
    data_frames,
)

eezAverage = (
    pd.DataFrame(fullData.stack().reset_index())
    .merge(eez, left_on="Country", right_on="Territory", how="left")
    .rename(columns={0: "value", "level_2": "indicator"})
)

eezAverage["valueEEZ"] = eezAverage.value * eezAverage.Area_km2
eezAverage = (
    eezAverage.groupby(["Year", "indicator"])
    .sum(numeric_only=True)
    .drop("value", axis=1)
)
eezAverage["averageEEZ"] = eezAverage.valueEEZ / eezAverage.Area_km2
Code
# define function to plot boxplots

def plotBox(df, xaxis_title=str, yaxis_title=str, xlegend=float, ylegend=float, maxShift=float, minShift=float): 
                # start plots
    fig = px.box(df, x="indicator", y="value")
    # add Countries in the max and min
    # create annotation list, x is the x-axis position of the annotation
    annoList = []

    maxCountriesD = {}
    x = 0
    for s in df.indicator.unique():
        # get max value for indicator
        maxVal = np.max(df[df["indicator"] == s]["value"])
        # get countries with max value, if more than 4 countries, use *
        countries = df.loc[(df['value'] == maxVal)&(df['indicator']== s), 'Country'].values
        if len(countries) > 4:
            maxCountriesD[s] = countries
            countries = '*'
        countries = ',<br>'.join(countries)
        annotation = dict(
            x=x,
            # y position is the max value
            y=maxVal,
            text= countries,
            yshift=maxShift,
            showarrow=False,
        )
        annoList.append(annotation)
        x += 1

    minCountriesD = {}
    x = 0
    for s in df.indicator.unique():
        # get min value for indicator
        minVal = np.min(df[df["indicator"] == s]["value"])
        # get countries with min value, if more than 4 countries, use * and store in dictionary
        countries = df.loc[(df['value'] == minVal)&(df['indicator']== s), 'Country'].values
        if len(countries) > 4:
            minCountriesD[s] = countries
            countries = '*'
        countries = ',<br>'.join(countries)
        annotation = dict(
            x=x,
            # y position is the min value
            y=minVal,
            text= countries,
            yshift=minShift,
            showarrow=False,
        )
        annoList.append(annotation)
        x += 1

    # add EEZ-weigthed average values
    indicatorAverage = eezAverage.loc[(2021,df.indicator.unique()), :].reset_index()
    for s in indicatorAverage.indicator.unique():
        # get EEZ-weigthed average value for indicator
        averageVal = float(indicatorAverage[indicatorAverage["indicator"] == s]["averageEEZ"])
        fig.add_scatter(
            x=[s],
            # y position is the average value
            y=[averageVal],
            type = "scatter", mode = 'markers', legendgroup = 'EEZ average', name = 'EEZ average', marker = dict(color = 'black', size = 6
        ))

    fig.update_layout(annotations=annoList)


    # just to add the legend with one entry
    fig.update_traces(showlegend=False).add_scatter(
            x=[s],
            y=[averageVal],
            type = "scatter", mode = 'markers', legendgroup = 'EEZ average', name = 'EEZ average', marker = dict(color = 'black', size = 6
        ))

    # change legend position, axis titles
    fig.update_layout(
        legend=dict(
            x=xlegend,
            y=ylegend,
            traceorder="normal",
            font=dict(
                family="sans-serif",
                size=12,
                color="black"
            ),
            bgcolor="White",
        ),
        xaxis_title=xaxis_title,
        yaxis_title=yaxis_title,
        # yaxis_range=[-20,100]
    )


    fig.show()
Code
# plot min, max, and EEZ-weigthed average for targets

indicatorsBox = (
    fullData[fullData.index.isin([2021], level=1)]
    .stack()
    .reset_index()
    .rename(columns={0: "value", "level_2": "indicator"})
)
indicatorsBox = indicatorsBox[indicatorsBox.indicator.isin(varNames)]
indicatorsBox.Country= indicatorsBox.Country.map(country_to_abbrev)


targetsBox = (
    fullData[fullData.index.isin([2021], level=1)]
    .stack()
    .reset_index()
    .rename(columns={0: "value", "level_2": "indicator"})
)
targetsBox = targetsBox[targetsBox.indicator.isin(list(indicatorsL.keys()))]

plotBox(indicatorsBox, "Indicators", "Percentage", xlegend=0.85, ylegend=0.2,maxShift=30, minShift=-10)
plotBox(targetsBox, "Targets", "Percentage", xlegend=0, ylegend=1,maxShift=10, minShift=-10)
Code
# plot comparing strong and weak sustainability
df = fullData.reset_index()

df["weakRank"] = df.groupby("Year")["scoreWeak"].rank(ascending=False)
df["strongRank"] = df.groupby("Year")["meanScore"].rank(ascending=False)
df["weakStd"] = df["stdScore"] * len(df["Country"].unique()) / 100 

for year in [2012, 2016, 2021]:
    fig = px.scatter(
        df[df.Year == year],
        x="weakRank",
        y="strongRank",
        text="Country",
        title=str(year),
        error_y=np.array(df[df.Year == year]["weakStd"]),
    )
    fig.update_traces(textposition="bottom right", marker=dict(
        color='black', size=0),error_y=dict(width=0)
    )
    # add 45 degree line
    fig.update_layout(
        shapes=[
            {
                "type": "line",
                "yref": "paper",
                "xref": "paper",
                "y0": 0,
                "y1": 1,
                "x0": 0,
                "x1": 1,
                "line": {"color": "grey", "width": 2},
                "layer": "below",
            }
        ]
    )
    # change axis titles, fix aspect ratio
    fig.update_layout(
        xaxis_title=r"$\text{Ranking with unlimited substitution possibilities} \quad \sigma= \infty$",
        yaxis_title=r"$\text{Ranking with limited substitution possibilities} \quad \sigma=\text{U(0,1)}$",
        font=dict(family="sans-serif"),
        autosize=False,
        width=800,
        height=800,
        yaxis=dict(range=[0, len(df["Country"].unique()) + 3]),
        xaxis=dict(range=[0, len(df["Country"].unique()) + 3]),
    )
    fig.show()

Trash bin

Code
# Get the targets for 2020 and 2030 in percentage
# Member State greenhouse gas emission limits in 2020 and 2030 compared to 2005 greenhouse gas emissions levels
# There targets for 2020 and for 2030
# https://www.eea.europa.eu/data-and-maps/figures/national-progress-towards-greenhouse-gas
# Official journals with the data can be found at (https://climate.ec.europa.eu/eu-action/effort-sharing-member-states-emission-targets_en)

# limitESR = pd.read_excel('../data/targetsESR/FIG2-154307-CLIM058-v2-Data.xlsx', sheet_name=1, skiprows=19, header=1, skipfooter=32)
# limitESR = limitESR.rename(columns={'Unnamed: 0':'Country', '(Resulting) ESR target 2030 (AR4)':'2030ESRtarget','ESR limit for 2020':'2020ESRtarget', '2005 ESD BJ':'2005Level'})
# limitESR = limitESR[['Country', '2020ESRtarget','2030ESRtarget','2005Level']]
# limitESR.set_index('Country', inplace=True)
# limitESR = limitESR[limitESR.index.isin(countries)]

# #UK is not in the dataset, we need to add from the official journal

# for i in countries:
#     if i not in limitESR.index:
#         print(i, 'is not in the dataset')

# limitESR
Code
# These data SHALL NOT BE USED. See reason on why ENV_AIR_GGE is preferable for the calculation:
# (https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Greenhouse_gas_emission_statistics_-_emission_inventories)
# (https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Greenhouse_gas_emission_statistics_-_air_emissions_accounts&oldid=551152#Greenhouse_gas_emissions)
# CO2, KG_HAB, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/ENV_AC_AINAH_R2/


# co2 = pd.read_csv('../data/env_ac_ainah_r2.csv')
# co2 = co2[co2['geo'].isin(countryAbb)]
# co2['geo'] = co2['geo'].map(abbrev_to_country).fillna(co2['geo'])
# co2 = co2.pivot_table(columns='TIME_PERIOD', index='geo', values='OBS_VALUE', aggfunc='mean')

# mr = co2.columns[-1] # most recent year
# # co2[[2012, 2016, mr]]/1000
Code
# From 14.6, using Eurostat data for landing values.

# Even though the USD-EUR discrepancy does not affect the ratio we calculate,
# we get today's exchange rate to convert landing values to USD and have a consistent unit
# Solution source: https://stackoverflow.com/a/17250702/14534411


# r = requests.get('http://www.ecb.int/stats/eurofxref/eurofxref-daily.xml', stream=True)
# tree = ET.parse(r.raw)
# root = tree.getroot()
# namespaces = {'ex': 'http://www.ecb.int/vocabulary/2002-08-01/eurofxref'}
# currency = 'USD'
# match = root.find('.//ex:Cube[@currency="{}"]'.format(currency.upper()), namespaces=namespaces)
# eurTOusd = float(match.attrib['rate'])

# Landings of fishery products, TOTAL FISHERY PRODUCTS, Euro, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/FISH_LD_MAIN/

# landing = pd.read_csv('../data/fish_ld_main.csv')
# landing['Country'] = landing.geo.map(abbrev_to_country).fillna(landing.geo)
# landing = landing[landing.Country.isin(countries)]
# landing = landing[['Country', 'TIME_PERIOD', 'OBS_VALUE', 'unit']]
# landing['landingUSD'] = landing.OBS_VALUE * eurTOusd
# landing.pivot_table(columns='TIME_PERIOD', index='Country', values='landingUSD', aggfunc='mean')